load library and data

GSEA_get_ranks <- function(res_input) {
  res_GSEA <- res_input %>%
    dplyr::select(SYMBOL, logFC_MMSE) %>%
    na.omit() %>% # remove NA
    distinct() %>% # remove duplicated
    group_by(SYMBOL) %>%
    summarize(logFC_MMSE = mean(logFC_MMSE))

  deframe(res_GSEA)
}

res_names <- DGE_list %>% names()
GSEA_ranks <- vector(mode = "list", length = length(res_names))
for (i in 1:length(GSEA_ranks)) {
  GSEA_ranks[[i]] <- GSEA_get_ranks(DGE_list[[i]])
}
names(GSEA_ranks) <- res_names
# GSEA_ranks %>% lapply(function(x){x %>% head(n= 1000 ) %>% tail(n=30)})
# running all GSEA using hallmark genesets
gsea_output_HM <- vector(mode = "list", length = length(GSEA_ranks))

for (i in 1:length(GSEA_ranks)) {
  gsea_output_HM[[i]] <- GSEA(sort(GSEA_ranks[[i]], decreasing = TRUE), TERM2GENE = h_gene_sets, pvalueCutoff = 1.0, eps = 0)
}

names(gsea_output_HM) <- res_names

gsea_df_HM <- gsea_output_HM %>% lapply(function(x) {
  x %>% as_tibble()
})
# %>% filter(padj < 0.2) %>%  arrange(desc(NES))
gsea_df_HM$`CDK2i200mpkQD_vs_control` %>% datatable(caption = "CDK2i200mpkQD_vs_control")
gsea_df_HM$`CDK2i100mpkBID_vs_control` %>% datatable(caption = "CDK2i100mpkBID_vs_control")
# running all GSEA using all the msigDB gene-sets of interest
gsea_output <- vector(mode = "list", length = length(GSEA_ranks))

for (i in 1:length(GSEA_ranks)) {
  gsea_output[[i]] <- GSEA(sort(GSEA_ranks[[i]], decreasing = TRUE), TERM2GENE = MSigDB.ofinterest, pvalueCutoff = 1.0, eps = 0)
}

names(gsea_output) <- res_names

saveRDS(gsea_output, "/researchers/krutika.ambani/Goel_lab_members/Cath_Dietrich/20230704_OV5398PDX_OVCAR3_ARC_Veh_RNA/R_analysis/OV598PDX/4.GSEA/data/rds/gesea_output.rds")
gsea_output <- readRDS("/researchers/krutika.ambani/Goel_lab_members/Cath_Dietrich/20230704_OV5398PDX_OVCAR3_ARC_Veh_RNA/R_analysis/OV598PDX/4.GSEA/data/rds/gesea_output.rds")

gsea_df <- gsea_output %>% lapply(function(x) {
  x %>% as_tibble()
})
gsea_df$`CDK2i200mpkQD_vs_control` %>% datatable(caption = "CDK2i200mpkQD_vs_control")
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
gsea_df$`CDK2i100mpkBID_vs_control` %>% datatable(caption = "CDK2i100mpkBID_vs_control")
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

plots - all genesets

combined_genesets_up <- gsea_df %>%
  lapply(function(x) {
    x %>%
      filter(p.adjust < 0.05 & NES > 0) %>%
      arrange(desc(NES)) %>%
      .$ID %>%
      head(n = 10)
  }) %>%
  unlist() %>%
  unique()

combined_genesets_down <- gsea_df %>%
  lapply(function(x) {
    x %>%
      filter(p.adjust < 0.05 & NES < 0) %>%
      arrange(NES) %>%
      .$ID %>%
      head(n = 10)
  }) %>%
  unlist() %>%
  unique()

combined_genesets <- c(combined_genesets_up, combined_genesets_down) %>% unique()
GSEA_names <- gsub("res_", "", names(gsea_df))

GSEA_ggplot <- data.frame()

for (i in 1:length(GSEA_names)) {
  temp <- gsea_df[[i]] %>%
    mutate(group = GSEA_names[i]) %>%
    dplyr::select(-leading_edge, -core_enrichment) %>%
    filter(ID %in% combined_genesets)
  GSEA_ggplot <- rbind(GSEA_ggplot, temp)
}

GSEA_ggplot$ID <- GSEA_ggplot$ID %>% factor(levels = combined_genesets)
GSEA_ggplot$group <- factor(GSEA_ggplot$group, levels = GSEA_names)

## Warning: Removed 5 rows containing missing values (`geom_point()`).

gseaplot(gsea_output$CDK2i200mpkQD_vs_control, geneSetID = c("SASP_Demaria"), title = "CDK2i200mpkQD_vs_control", pvalue_table = TRUE, base_size = 14)

gseaplot2(gsea_output$CDK2i200mpkQD_vs_control, geneSetID = c("SASP_Demaria"), title = "CDK2i200mpkQD_vs_control", color = c("#E495A5", "#86B875"), pvalue_table = TRUE, base_size = 14)

gseaplot2(gsea_output$CDK2i100mpkBID_vs_control, geneSetID = c("SASP_Demaria"), title = "CDK2i100mpkBID_vs_control", color = c("#E495A5", "#86B875"), pvalue_table = TRUE, base_size = 14)

plots - hallmark genesets

combined_genesets_up <- gsea_df_HM %>%
  lapply(function(x) {
    x %>%
      filter(p.adjust < 0.05 & NES > 0) %>%
      arrange(desc(NES)) %>%
      .$ID %>%
      head(n = 10)
  }) %>%
  unlist() %>%
  unique()

combined_genesets_down <- gsea_df_HM %>%
  lapply(function(x) {
    x %>%
      filter(p.adjust < 0.05 & NES < 0) %>%
      arrange(NES) %>%
      .$ID %>%
      head(n = 10)
  }) %>%
  unlist() %>%
  unique()

combined_genesets <- c(combined_genesets_up, combined_genesets_down) %>% unique()
GSEA_names <- gsub("res_", "", names(gsea_df_HM))

GSEA_ggplot <- data.frame()

for (i in 1:length(GSEA_names)) {
  temp <- gsea_df_HM[[i]] %>%
    mutate(group = GSEA_names[i]) %>%
    dplyr::select(-leading_edge, -core_enrichment) %>%
    filter(ID %in% combined_genesets)
  GSEA_ggplot <- rbind(GSEA_ggplot, temp)
}

GSEA_ggplot$ID <- GSEA_ggplot$ID %>% factor(levels = combined_genesets)
GSEA_ggplot$group <- factor(GSEA_ggplot$group, levels = GSEA_names)


# GSEA_ggplot$NES[GSEA_ggplot$p.adjust > 0.05] <- NA
GSEA_ggplot$NES %>% range()
# decided not to use this but the heatmap instead
GSEA_ggplot %>%
  ggplot(aes(x = group, y = ID, fill = NES)) +
  scale_fill_gradientn(colours = c("dodgerblue3", "white", "orange"), limits = c(-3, 3)) +
  geom_tile() +
  geom_tile(data = filter(GSEA_ggplot, p.adjust > 0.05), fill = "white") +
  geom_text(aes(label = sprintf("%0.2f", NES))) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 12),
    axis.text.y = element_text(color = "black", size = 10),
    axis.title.y = element_blank(),
    axis.title.x = element_blank(),
    legend.position = "bottom"
  ) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

GSEA_ggplot$NES %>% range()
GSEA_ggplot$p.adjust[GSEA_ggplot$p.adjust > 0.05] <- NA

# decided not to use this but the heatmap instead
GSEA_ggplot %>%
  ggplot(aes(x = group, y = ID, col = NES)) +
  # scale_fill_gradientn(colours = c("dodgerblue3","white", "orange"), limits = c(-4,4)) +
  scale_color_gradientn(colours = c("dodgerblue3", "white", "orange")) +
  geom_point(aes(size = -log10(p.adjust))) +

  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, color = "black", size = 12),
    axis.text.y = element_text(color = "black", size = 10),
    axis.title.y = element_blank(),
    axis.title.x = element_blank(),
    legend.position = "right"
  )
## Warning: Removed 5 rows containing missing values (`geom_point()`).

gseaplot(gsea_output_HM$CDK2i200mpkQD_vs_control, geneSetID = c("HALLMARK_E2F_TARGETS"), title = "CDK2i200mpkQD_vs_control", pvalue_table = TRUE, base_size = 14)

gseaplot2(gsea_output_HM$CDK2i200mpkQD_vs_control, geneSetID = c("HALLMARK_E2F_TARGETS"), title = "CDK2i200mpkQD_vs_control", color = c("#E495A5", "#86B875"), pvalue_table = TRUE, base_size = 14)

gseaplot2(gsea_output_HM$CDK2i100mpkBID_vs_control, geneSetID = c("HALLMARK_E2F_TARGETS"), title = "CDK2i100mpkBID_vs_control", color = c("#E495A5", "#86B875"), pvalue_table = TRUE, base_size = 14)